Back to Article
Data Screening
Download Source

Data Screening

Author

Ian Contreras

Importaciones

Librerías

In [2]:
#| label: importing-libraries
# Manejo de datos y análisis
import numpy as np
import pandas as pd
import scipy.stats as stats

# Modelos estadísticos y econométricos
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.stattools import adfuller
from pmdarima.arima import auto_arima
from arch import arch_model

# Visualización
import matplotlib.pyplot as plt

# Presentación en IPython
from IPython.display import Markdown, HTML

Data

In [3]:
#| label: importing-data
df = pd.read_csv(r'..\data\csv\irp.csv', parse_dates=[
                 'date'], index_col='date')
returns = df['price_return']
split_date = '2020-12-31'
R_test = df[df.index >= split_date]['price_return'].rolling(
    window=5).std().dropna()

Análisis Exploratorio.

Serie Histórica

In [4]:
#| label: fig-price-return-series
#| fig-cap: Serie de retornos diarios IRP-GOBIX.
fig, ax = plt.subplots(figsize=(12, 10))
ax.plot(df.index, df['price_return'])
ax.set_ylabel('Retorno Precio (BPS)')
ax.set_xlim([df.index.min(), df.index.max()])
ax.legend()
plt.show()
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.
Serie de retornos diarios IRP-GOBIX.

Análisis descriptivo de la muestra

In [5]:
#| label: return-descriptive-stats
#| fig-cap: Tabla de las estadísticas descriptiva de la serie de retornos
desc_stats = returns.describe()

skewness = returns.skew()
kurtosis = returns.kurtosis()
jb_test = sm.stats.jarque_bera(returns)

descriptive_table = pd.DataFrame({
    'Observations': [int(desc_stats['count'])],
    'Mean': [desc_stats['mean']],
    'Median': [desc_stats['50%']],
    'Std. Dev': [desc_stats['std']],
    'Skewness': [skewness],
    'Kurtosis': [kurtosis],
    'Jarque-Bera': [jb_test[0]],
    'Prob.': [jb_test[1]]
})
Markdown(descriptive_table.to_markdown(index=False))
Observations Mean Median Std. Dev Skewness Kurtosis Jarque-Bera Prob.
1941 1.35325 -0.318044 23.5029 0.70527 7.588 4789.53 0

Tabla de las estadísticas descriptiva de la serie de retornos

Ajuste de distribuciones

In [6]:
#| label: fig-distribution-fitting
#| fig-cap: Ajuste de distribuciones a los retornos de precio.
distributions = [stats.norm, stats.t]

plt.figure(figsize=(10, 6))
plt.hist(returns, bins=50, density=True, alpha=0.6, label='Resultados Empíricos')

for dist in distributions:
    params = dist.fit(returns)
    x = np.linspace(returns.min(), returns.max(), 100)
    plt.plot(x, dist.pdf(x, *params), label=f'{dist.name} fit')

plt.xlabel('Retorno Precio (BPS)')
plt.ylabel('Verosimilitud')
plt.legend()
plt.show()
Ajuste de distribuciones a los retornos de precio.

Q-Q plot con respecto a distribución t

In [7]:
#| label: fig-tqq-plot
#| fig-cap: Q-Q Plot de retornos de precio contra la distribución t.
t_params = stats.t.fit(returns)

fig, ax = plt.subplots(figsize=(12, 10))
stats.probplot(returns, dist="t", sparams=t_params, plot=plt)
plt.grid(True)
plt.show()
Q-Q Plot de retornos de precio contra la distribución t.

Kolmorov Smirnov test para distribución t

In [8]:
#| label: kolmorov-smirnov
ks_stat, p_value = stats.kstest(returns, 't', args=t_params)
print(f"Kolmogorov-Smirnov statistic: {ks_stat}")
print(f"P-value: {p_value}")
Kolmogorov-Smirnov statistic: 0.028557323574938343
P-value: 0.0827419967953622

Test de estacionariedad/ robustez

In [9]:
#| label: adf
result = adfuller(returns)
print('ADF Statistic:', result[0])
print('p-value:', result[1])
ADF Statistic: -13.174114850736395
p-value: 1.2332976276203498e-24

Test de autocorrelación

In [10]:
#| label: fig-acf-pacf
#| fig-cap: Función de autocorrelación (ACF) y función de autocorrelación parcial (PACF) de los retornos.
fig, axes = plt.subplots(2, 1, figsize=(10, 8))
plot_acf(returns, lags=24, ax=axes[0])
axes[0].set_title('Función de Autocorrelación (ACF)')

plot_pacf(returns, lags=12, ax=axes[1])
axes[1].set_title('Función de Autocorrelación Parcial (PACF)')

plt.tight_layout()
plt.show()
Función de autocorrelación (ACF) y función de autocorrelación parcial (PACF) de los retornos.

Resultados

Estimación del modelo ARIMA

In [11]:
#| label: arima-model
#| fig-cap: Modelo ARIMA de maxima verosimilitud para la serie de retornos.
model_auto = auto_arima(returns)
model_auto.summary()
SARIMAX Results
Dep. Variable: y No. Observations: 1941
Model: SARIMAX(3, 0, 4) Log Likelihood -8853.517
Date: Sat, 19 Oct 2024 AIC 17725.034
Time: 16:58:02 BIC 17775.173
Sample: 0 HQIC 17743.472
- 1941
Covariance Type: opg
coef std err z P>|z| [0.025 0.975]
intercept 0.3602 0.273 1.317 0.188 -0.176 0.896
ar.L1 0.1366 0.065 2.106 0.035 0.009 0.264
ar.L2 -0.1931 0.066 -2.934 0.003 -0.322 -0.064
ar.L3 0.7691 0.061 12.644 0.000 0.650 0.888
ma.L1 -0.1139 0.065 -1.752 0.080 -0.241 0.014
ma.L2 0.2163 0.067 3.220 0.001 0.085 0.348
ma.L3 -0.7032 0.062 -11.315 0.000 -0.825 -0.581
ma.L4 0.0750 0.020 3.715 0.000 0.035 0.115
sigma2 536.2183 8.358 64.154 0.000 519.836 552.600
Ljung-Box (L1) (Q): 0.00 Jarque-Bera (JB): 4395.18
Prob(Q): 1.00 Prob(JB): 0.00
Heteroskedasticity (H): 1.11 Skew: 0.59
Prob(H) (two-sided): 0.17 Kurtosis: 10.28


Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).

Modelo ARIMA de maxima verosimilitud para la serie de retornos.

Estimación del AR-GARCH

In [12]:
#| label: garch-model-fitting
#| fig-cap: Ajuste del modelo Zero-Garch ala serie de retornos
ar = arch_model(returns, mean='Zero', vol='GARCH', dist='t')
res = ar.fit(last_obs=split_date)
Iteration:      1,   Func. Count:      6,   Neg. LLF: 13422.240578344366
Iteration:      2,   Func. Count:     13,   Neg. LLF: 9134.230609110135
Iteration:      3,   Func. Count:     19,   Neg. LLF: 8666.557319151609
Iteration:      4,   Func. Count:     25,   Neg. LLF: 8791.275490419279
Iteration:      5,   Func. Count:     31,   Neg. LLF: 8082.081172585756
Iteration:      6,   Func. Count:     37,   Neg. LLF: 8739.332058159429
Iteration:      7,   Func. Count:     43,   Neg. LLF: 8658.860183258712
Iteration:      8,   Func. Count:     49,   Neg. LLF: 7638.6451038734085
Iteration:      9,   Func. Count:     55,   Neg. LLF: 7626.627815474433
Iteration:     10,   Func. Count:     60,   Neg. LLF: 7624.619361750198
Iteration:     11,   Func. Count:     65,   Neg. LLF: 7623.76815711753
Iteration:     12,   Func. Count:     70,   Neg. LLF: 7623.223103893486
Iteration:     13,   Func. Count:     75,   Neg. LLF: 7623.048366501091
Iteration:     14,   Func. Count:     80,   Neg. LLF: 7622.998678403758
Iteration:     15,   Func. Count:     85,   Neg. LLF: 7622.986214899395
Iteration:     16,   Func. Count:     90,   Neg. LLF: 7622.975788918075
Iteration:     17,   Func. Count:     95,   Neg. LLF: 7622.9611635250085
Iteration:     18,   Func. Count:    100,   Neg. LLF: 7622.937610163095
Iteration:     19,   Func. Count:    105,   Neg. LLF: 7622.908361612077
Iteration:     20,   Func. Count:    110,   Neg. LLF: 7622.881904359898
Iteration:     21,   Func. Count:    115,   Neg. LLF: 7622.870928301752
Iteration:     22,   Func. Count:    120,   Neg. LLF: 7622.868967833485
Iteration:     23,   Func. Count:    125,   Neg. LLF: 7622.868843481545
Iteration:     24,   Func. Count:    130,   Neg. LLF: 7622.868828856013
Iteration:     25,   Func. Count:    135,   Neg. LLF: 7622.868827051428
Iteration:     26,   Func. Count:    139,   Neg. LLF: 7622.868827051394
Optimization terminated successfully    (Exit mode 0)
            Current function value: 7622.868827051428
            Iterations: 26
            Function evaluations: 139
            Gradient evaluations: 26

Resultados del AR-GARCH

In [13]:
#| label: garch-model
#| fig-cap: Modelo Zero-Garch de la serie de retornos
HTML(res.summary().as_html())
Zero Mean - GARCH Model Results
Dep. Variable: price_return R-squared: 0.000
Mean Model: Zero Mean Adj. R-squared: 0.001
Vol Model: GARCH Log-Likelihood: -7622.87
Distribution: Standardized Student's t AIC: 15253.7
Method: Maximum Likelihood BIC: 15275.6
No. Observations: 1753
Date: Sat, Oct 19 2024 Df Residuals: 1753
Time: 16:58:02 Df Model: 0
Volatility Model
coef std err t P>|t| 95.0% Conf. Int.
omega 58.8679 25.182 2.338 1.940e-02 [ 9.512,1.082e+02]
alpha[1] 0.1892 6.657e-02 2.842 4.482e-03 [5.873e-02, 0.320]
beta[1] 0.7737 6.832e-02 11.324 9.943e-30 [ 0.640, 0.908]
Distribution
coef std err t P>|t| 95.0% Conf. Int.
nu 2.7894 0.230 12.114 8.853e-34 [ 2.338, 3.241]


Covariance estimator: robust

Modelo Zero-Garch de la serie de retornos

Residuos del AR-GARCH

In [14]:
#| label: fig-garch-residuals
#| fig-cap: Residuos del modelo AR-GARCH.
fig = res.plot()
Residuos del modelo AR-GARCH.

Residuos contra volatilidad condicional

In [15]:
#| label: fig-residuals-vs-volatility
#| fig-cap: Residuos contra la volatilidad condicional del AR-GARCH.
std_resid = res.resid / res.conditional_volatility
unit_var_resid = res.resid / res.resid.std()
df = pd.concat([std_resid, unit_var_resid], axis=1)
df.columns = ["Residuos Estandarizados", "Residuos de Varianza Unitaria"]
subplot = df.plot(kind="kde", xlim=(-4, 4))
Residuos contra la volatilidad condicional del AR-GARCH.

Evaluación Retrospectiva (Backtesting)

Conversion de varianza predicha.

In [16]:
#| label: forecast
forecasts = res.forecast(horizon=1)
forecasted_std = np.sqrt(forecasts.variance)
forecasted_std = forecasted_std[forecasted_std.index >
                                split_date].loc[R_test.index, 'h.1']

MAPE del modelo

In [17]:
#| label: mape
def MAPE(actual, predicted):
    actual_std = actual.rolling(window=5).std().dropna()
    return np.mean(np.abs((actual_std - predicted) / actual_std)) * 100

mape_value = MAPE(R_test, forecasted_std)
print(f"MAPE on Test Set (based on moving std): {mape_value:.2f}%")
MAPE on Test Set (based on moving std): 739.70%

Predichos vs Actuales Plot-Backtested

In [18]:
#| label: fig-predicted-vs-actual-backtest
#| fig-cap: Desviación Estándar Móvil Real vs Pronosticada - Modelo Zero-GARCH
fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(R_test.index, R_test,
        label='Desviación Estándar Móvil Real', color='blue')
ax.plot(forecasted_std.index,
        forecasted_std, label='Desviación Estándar Móvil Pronosticada', color='red', linestyle='--')
ax.set_xlim([forecasted_std.index.min(), forecasted_std.index.max()])
plt.legend()
plt.show()
Desviación Estándar Móvil Real vs Pronosticada - Modelo Zero-GARCH